/*
 * Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada
 * And Contributors.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * 3. All modifications to the source code must be clearly marked as
 *    such.  Binary redistributions based on modified source code
 *    must be clearly marked as modified versions in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>

#include "mythes.hxx"

// some basic utility routines

// string duplication routine
char *MyThes::mystrdup(const char * p)
{

    int sl = strlen(p) + 1;
    char * d = (char *)malloc(sl);
    if (d)
    {
        memcpy(d,p,sl);
        return d;
    }
    return NULL;
}

// return index of char in string
int MyThes::mystr_indexOfChar(const char * d, int c)
{
    const char * p = strchr(d,c);
    if (p)
        return (int)(p-d);
    return -1;
}

// remove cross-platform text line end characters
void MyThes::mychomp(char * s)
{
    int k = strlen(s);
    if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n')))
        *(s+k-1) = '\0';
    if ((k > 1) && (*(s+k-2) == '\r'))
        *(s+k-2) = '\0';
}

MyThes::MyThes(const char* idxpath, const char * datpath)
{
    nw = 0;
    encoding = NULL;
    list = NULL;
    offst = NULL;
    pdfile = NULL;

    if (thInitialize(idxpath, datpath) != 1)
    {
        fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
        fflush(stderr);
        if (encoding)
            free((void*)encoding);
        if (list)
            free((void*)list);
        if (offst)
            free((void*)offst);
        // did not initialize properly - throw exception?
    }
}

MyThes::~MyThes()
{
    if (thCleanup() != 1)
    {
        /* did not cleanup properly - throw exception? */
    }
    if (encoding)
        free((void*)encoding);
    encoding = NULL;
    list = NULL;
    offst = NULL;
}

int MyThes::thInitialize(const char* idxpath, const char* datpath)
{

    // open the index file
    FILE * pifile = fopen(idxpath,"r");
    if (!pifile)
    {
        pifile = NULL;
        return 0;
    }

    // parse in encoding and index size */
    char * wrd;
    wrd = (char *)calloc(1, MAX_WD_LEN);
    int len = readLine(pifile,wrd,MAX_WD_LEN);
    encoding = mystrdup(wrd);
    len = readLine(pifile,wrd,MAX_WD_LEN);
    int idxsz = atoi(wrd);


    // now allocate list, offst for the given size
    list = (char**)   calloc(idxsz,sizeof(char*));
    offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));

    if ( (!(list)) || (!(offst)) )
    {
        fprintf(stderr,"Error - bad memory allocation\n");
        fflush(stderr);
        return 0;
    }

    // now parse the remaining lines of the index
    len = readLine(pifile,wrd,MAX_WD_LEN);
    while (len > 0)
    {
        int np = mystr_indexOfChar(wrd,'|');
        if (nw < idxsz)
        {
            if (np >= 0)
            {
                *(wrd+np) = '\0';
                list[nw] = (char *)calloc(1,(np+1));
                memcpy((list[nw]),wrd,np);
                offst[nw] = atoi(wrd+np+1);
                nw++;
            }
        }
        len = readLine(pifile,wrd,MAX_WD_LEN);
    }

    free((void *)wrd);
    fclose(pifile);
    pifile=NULL;

    /* next open the data file */
    pdfile = fopen(datpath,"r");
    if (!pdfile)
    {
        pdfile = NULL;
        return 0;
    }

    return 1;
}

int MyThes::thCleanup()
{
    /* first close the data file */
    if (pdfile)
    {
        fclose(pdfile);
        pdfile=NULL;
    }

    /* now free up all the allocated strings on the list */
    for (int i=0; i < nw; i++)
    {
        if (list[i])
        {
            free(list[i]);
            list[i] = 0;
        }
    }

    if (list)
        free((void*)list);
    if (offst)
        free((void*)offst);

    nw = 0;
    return 1;
}


// lookup text in index and count of meanings and a list of meaning entries
// with each entry having a synonym count and pointer to an
// array of char * (i.e the synonyms)
//
// note: calling routine should call CleanUpAfterLookup with the original
// meaning point and count to properly deallocate memory

int MyThes::Lookup(const char * pText, int len, mentry** pme)
{

    *pme = NULL;

    // handle the case of missing file or file related errors
    if (! pdfile)
        return 0;

    long offset = 0;

    /* copy search word and make sure null terminated */
    char * wrd = (char *) calloc(1,(len+1));
    memcpy(wrd,pText,len);

    /* find it in the list */
    int idx = binsearch(wrd,list,nw);
    free(wrd);
    if (idx < 0)
        return 0;

    // now seek to the offset
    offset = (long) offst[idx];
    int rc = fseek(pdfile,offset,SEEK_SET);
    if (rc)
    {
        return 0;
    }

    // grab the count of the number of meanings
    // and allocate a list of meaning entries
    char * buf = NULL;
    buf  = (char *) malloc( MAX_LN_LEN );
    if (!buf)
        return 0;
    readLine(pdfile, buf, (MAX_LN_LEN-1));
    int np = mystr_indexOfChar(buf,'|');
    if (np < 0)
    {
        free(buf);
        return 0;
    }
    int nmeanings = atoi(buf+np+1);
    *pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
    if (!(*pme))
    {
        free(buf);
        return 0;
    }

    // now read in each meaning and parse it to get defn, count and synonym lists
    mentry* pm = *(pme);
    char dfn[MAX_WD_LEN];

    for (int j = 0; j < nmeanings; j++)
    {
        readLine(pdfile, buf, (MAX_LN_LEN-1));

        pm->count = 0;
        pm->psyns = NULL;
        pm->defn = NULL;

        // store away the part of speech for later use
        char * p = buf;
        char * pos = NULL;
        np = mystr_indexOfChar(p,'|');
        if (np >= 0)
        {
            *(buf+np) = '\0';
            pos = mystrdup(p);
            p = p + np + 1;
        }
        else
        {
            pos = mystrdup("");
        }

        // count the number of fields in the remaining line
        int nf = 1;
        char * d = p;
        np = mystr_indexOfChar(d,'|');
        while ( np >= 0 )
        {
            nf++;
            d = d + np + 1;
            np = mystr_indexOfChar(d,'|');
        }
        pm->count = nf;
        pm->psyns = (char **) malloc(nf*sizeof(char*));

        // fill in the synonym list
        d = p;
        for (int j = 0; j < nf; j++)
        {
            np = mystr_indexOfChar(d,'|');
            if (np > 0)
            {
                *(d+np) = '\0';
                pm->psyns[j] = mystrdup(d);
                d = d + np + 1;
            }
            else
            {
                pm->psyns[j] = mystrdup(d);
            }
        }

        // add pos to first synonym to create the definition
        int k = strlen(pos);
        int m = strlen(pm->psyns[0]);
        if ((k+m) < (MAX_WD_LEN - 1))
        {
            strncpy(dfn,pos,k);
            *(dfn+k) = ' ';
            strncpy((dfn+k+1),(pm->psyns[0]),m+1);
            pm->defn = mystrdup(dfn);
        }
        else
        {
            pm->defn = mystrdup(pm->psyns[0]);
        }
        free(pos);
        pm++;

    }
    free(buf);

    return nmeanings;
}

void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
{

    if (nmeanings == 0)
        return;
    if ((*pme) == NULL)
        return;

    mentry * pm = *pme;

    for (int i = 0; i < nmeanings; i++)
    {
        int count = pm->count;
        for (int j = 0; j < count; j++)
        {
            if (pm->psyns[j])
                free(pm->psyns[j]);
            pm->psyns[j] = NULL;
        }
        if (pm->psyns)
            free(pm->psyns);
        pm->psyns = NULL;
        if (pm->defn)
            free(pm->defn);
        pm->defn = NULL;
        pm->count = 0;
        pm++;
    }
    pm = *pme;
    free(pm);
    *pme = NULL;
    return;
}

// read a line of text from a text file stripping
// off the line terminator and replacing it with
// a null string terminator.
// returns:  -1 on error or the number of characters in
//             in the returning string

// A maximum of nc characters will be returned

int MyThes::readLine(FILE * pf, char * buf, int nc)
{

    if (fgets(buf,nc,pf))
    {
        mychomp(buf);
        return strlen(buf);
    }
    return -1;
}



//  performs a binary search on null terminated character
//  strings
//
//  returns: -1 on not found
//           index of wrd in the list[]

int MyThes::binsearch(char * sw, char* list[], int nlst)
{
    int lp, up, mp, j, indx;
    lp = 0;
    up = nlst-1;
    indx = -1;
    if (strcmp(sw,list[lp]) < 0)
        return -1;
    if (strcmp(sw,list[up]) > 0)
        return -1;
    while (indx < 0 )
    {
        mp = (int)((lp+up) >> 1);
        j = strcmp(sw,list[mp]);
        if ( j > 0)
        {
            lp = mp + 1;
        }
        else if (j < 0 )
        {
            up = mp - 1;
        }
        else
        {
            indx = mp;
        }
        if (lp > up)
            return -1;
    }
    return indx;
}

char * MyThes::get_th_encoding()
{
    if (encoding)
        return encoding;
    return NULL;
}

